home *** CD-ROM | disk | FTP | other *** search
-
- Relay-Version: version B 2.10.2 9/5/84; site philabs.UUCP
- Path: philabs!cmcl2!harvard!talcott!panda!sources-request
- From: sources-request@panda.UUCP
- Newsgroups: mod.sources
- Subject: bm version 1.2 (blindingly fast "fgrep")
- Message-ID: <1424@panda.UUCP>
- Date: 20 Feb 86 21:44:56 GMT
- Date-Received: 21 Feb 86 23:14:56 GMT
- Sender: jpn@panda.UUCP
- Organization: Bell-Northern Research, Ottawa, Ontario
- Lines: 896
- Approved: jpn@panda.UUCP
-
- Mod.sources: Volume 4, Issue 1
- Submitted by: decvax!bnr-vpa!pdbain (Peter Bain)
-
-
- This contains version 1.2 bm. Differences from 1.1 include certain
- bug fixes, notably the one which prevented it from finding patterns
- in lines which straddled buffer boundaries.
-
- Please see the README for system dependencies.
-
- ********************** cut here *******************
- #!/bin/sh
- # This is a shell archive, meaning:
- # 1. Remove everything above the #!/bin/sh line.
- # 2. Save the resulting text in a file.
- # 3. Execute the file with /bin/sh (not csh) to create the files:
- # bm.h
- # bm.c
- # Execute.c
- # Extern.h
- # GetPatFile.c
- # Global.c
- # MakeDesc.c
- # MakeSkip.c
- # MatchFound.c
- # MkDescVec.c
- # MoveResidue.c
- # PrintLine.c
- # PutUsage.c
- # Search.c
- # Makefile
- # README
- # bm.1
- # This archive created: Thu Feb 13 09:21:33 1986
- export PATH; PATH=/bin:$PATH
- if test -f 'bm.h'
- then
- echo shar: over-writing existing file "'bm.h'"
- fi
- cat << \SHAR_EOF > 'bm.h'
- #define FIRSTCHAR ' '
- #define MAXCHAR 0377
- #define MAXBUFF 8192
- #define MAXSIZE 100
- #define MAXPATS 100 /* max number of patterns */
- #define PSIZEDEF 1024 /* space for patterns if they come from a tty */
- #define min(x,y) ((x) < (y) ? (x) : (y))
- #define max(x,y) ((x) > (y) ? (x) : (y))
- struct PattDesc {
- short unsigned int *Skip1, *Skip2; /* pointers to skip tables */
- char *Pattern;
- int PatLen; /* pattern length */
- char *Start;
- /* starting position of search (at beginning of pattern) */
- int Success; /* true when pattern found */
- };
- SHAR_EOF
- if test -f 'bm.c'
- then
- echo shar: over-writing existing file "'bm.c'"
- fi
- cat << \SHAR_EOF > 'bm.c'
- #include <stdio.h>
- #include <fcntl.h>
- #include <string.h>
- #include "bm.h"
- #include "Extern.h"
- main(argc,argv)
- int argc;
- char *argv[];
- {
- /* grep based on Boyer-Moore algorithm */
- char BigBuff[MAXBUFF + 2];
- /*
- * We leave one extra character at the beginning and end of the buffer,
- * but don't tell Execute about it. This is so when someone is
- * scanning the buffer and scans past the end (or beginning)
- * we are still technically in the buffer (picky, but there ARE
- * machines which would complain)
- */
- int ret = 1, /* return code from Execute */
- NotFound = 0, /* non-zero if file not readable */
- NFiles,
- NPats; /* number of patterns to search for */
- char *FlagPtr,
- **OptPtr; /* used to scan command line */
- int TextFile /* file to search */;
- struct PattDesc *DescVec[MAXPATS];
-
- --argc;
- OptPtr = argv + 1;
- while ( argc && **OptPtr == '-') {
- FlagPtr = *OptPtr + 1;
- do {
- switch (*FlagPtr) {
- case 'c': cFlag = 1; break;
- case 'e': eFlag = 1;
- /* get the patterns from next arg */
- NPats = MkDescVec(DescVec,*++OptPtr);
- --argc;
- break;
- case 'f': fFlag = 1;
- /* read the patterns from a file */
- NPats = GetPatFile(*++OptPtr,DescVec);
- --argc;
- break;
- case 'l': lFlag = 1; break;
- case 'n': nFlag = 1; break;
- case 's': sFlag = 1; break;
- case 'x': xFlag = 1; break;
- case 'h': hFlag = 1; break;
- default:
- fprintf(stderr,
- "bm: invalid option: -%c \n",
- *FlagPtr);
- PutUsage();
- exit(2);
- } /* switch */
- ++FlagPtr;
- } while (*FlagPtr);
- ++OptPtr; --argc;
- } /* while */
- /* OptPtr now points to patterns */
- if (!fFlag && !eFlag) {
- if (!argc) {
- fprintf(stderr,"bm: no pattern specified\n");
- PutUsage();
- exit(2);
- } else
- NPats = MkDescVec(DescVec,*OptPtr);
- ++OptPtr; --argc;
- }
- /* OptPtr now points to first file */
- NFiles = argc;
- if (!NFiles)
- ret &= Execute(DescVec,NPats,0,BigBuff+1);
- else while (argc--) {
- if ((NFiles > 1) || lFlag) FileName = *OptPtr;
- if ((TextFile = open(*OptPtr,O_RDONLY,0)) < 0) {
- fprintf(stderr,"bm: can't open %s\n",*OptPtr);
- NotFound++;
- }
- else {
- ret &= Execute(DescVec,NPats,TextFile,BigBuff+1);
- if (sFlag && !ret)
- exit(0);
- close(TextFile);
- } /* if */
- ++OptPtr;
- } /* while */
- if (cFlag) printf("%d\n",MatchCount);
- exit(NotFound ? 2 : ret);
- } /* main */
- SHAR_EOF
- if test -f 'Execute.c'
- then
- echo shar: over-writing existing file "'Execute.c'"
- fi
- cat << \SHAR_EOF > 'Execute.c'
- #include <stdio.h>
- #include "bm.h"
- #include "Extern.h"
- Execute(DescVec, NPats, TextFile, Buffer)
- register struct PattDesc *DescVec[];
- /* pointers to status vectors for the different
- * patterns, including skip tables, position in buffer, etc. */
- register int NPats; /* number of patterns */
- register char Buffer[]; /* holds text from file */
- register int TextFile; /* file to search */
- {
- int NRead, /* number of chars read from file */
- NWanted, /* number of chars wanted */
- NAvail, /* number of chars actually read */
- BuffSize, /* number of chars in buffer */
- BuffPos, /* offset of first char in Buffer in TextFile */
- BuffEx, /* flag to indicate that buffer has been searched */
- ResSize,
- /* number of characters in the last, incomplete line */
- Found, /* flag indicates whether pattern found
- * completely and all matches printed */
- Valid; /* was the match "valid", i.e. if -x used,
- * did the whole line match? */
- char *BuffEnd; /* pointer to last char of last complete line */
-
- /* misc working variables */
- int i;
-
- /* initialize */
- ResSize = 0;
- Found = 0;
- BuffPos = 0;
- for (i=0; i < NPats; i++) {
- DescVec[i] -> Success = 0;
- DescVec[i] -> Start = Buffer;
- } /* for */
- /* now do the searching */
- do {
- /* first, read a bufferfull and set up the variables */
- NWanted = MAXBUFF - ResSize; NRead = 0;
- do {
- NAvail =
- read(TextFile,Buffer + ResSize + NRead, NWanted);
- if (NAvail == -1) {
- fprintf(stderr,
- "bm: error reading from input file\n");
- exit(2);
- } /* if */
- NRead += NAvail; NWanted -= NAvail;
- } while (NAvail && NWanted);
- BuffEx = 0;
- BuffSize = ResSize + NRead;
- BuffEnd = Buffer + BuffSize - 1;
- /* locate the end of the last complete line */
- while (*BuffEnd != '\n' && BuffEnd >= Buffer)
- --BuffEnd;
- if (BuffEnd < Buffer)
- BuffEnd = Buffer + BuffSize - 1;
- while (!BuffEx) { /* work through one buffer full */
- BuffEx = 1; /* set it provisionally, then clear
- * it if we find the buffer non-empty */
- for (i=0; i< NPats; i++) {
- if (!DescVec[i]->Success)
- /* if the pattern has not been found */
- DescVec[i]-> Success =
- Search(DescVec[i]->Pattern,
- DescVec[i]->PatLen, BuffEnd,
- DescVec[i]->Skip1, DescVec[i]->Skip2,
- DescVec[i]);
- if (DescVec[i]->Success){
- /* if a match occurred */
- BuffEx = 0;
- Valid = MatchFound(DescVec[i],BuffPos,
- Buffer, BuffEnd);
- Found |= Valid;
- if ((sFlag || lFlag) && Found)
- return(0);
- } /* if */
- } /* for */
- } /* while */
- if(NRead) {
- ResSize = MoveResidue(DescVec,NPats,Buffer,
- Buffer+BuffSize-1);
- BuffPos += BuffSize - ResSize;
- } /* if */
- } while (NRead);
- return(!Found);
- } /* Execute */
- SHAR_EOF
- if test -f 'Extern.h'
- then
- echo shar: over-writing existing file "'Extern.h'"
- fi
- cat << \SHAR_EOF > 'Extern.h'
- /* global flags for bm */
- extern int cFlag, /* true if we want only a count of matching lines */
- eFlag, /* indicates that next argument is the pattern */
- fFlag, /* true if the patterns arew to come from a file */
- lFlag, /* true if we want a list of files containing the pattern */
- nFlag, /* true if we want the character offset of the pattern */
- sFlag, /* true if we want silent mode */
- xFlag, /* true if we want only lines which match entirely */
- hFlag, /* true if we want no filenames in output */
-
- MatchCount; /* count of number of times a search string was found
- * in the text */
- extern char *FileName;
- SHAR_EOF
- if test -f 'GetPatFile.c'
- then
- echo shar: over-writing existing file "'GetPatFile.c'"
- fi
- cat << \SHAR_EOF > 'GetPatFile.c'
- #include <stdio.h>
- #include <sys/types.h>
- #include <sys/stat.h>
- #include "bm.h"
- int
- GetPatFile(PatFile, DescVec)
- char *PatFile;
- struct PattDesc *DescVec[];
- /* read patterns from a file and set up a pattern descriptor vector */
- {
- extern char *malloc();
- FILE *PFile;
- struct stat StatBuff;
- int PatSize; /* the number of chars in all the patterns */
- char *PatBuff; /* hold the patterns */
- if (!(strlen(PatFile))) {
- fprintf(stderr,"bm: no pattern file given\n");
- exit(2);
- } /* if */
- if (!(PFile = fopen(PatFile,"r"))) {
- fprintf(stderr,"bm: can't open pattern file %s\n",PatFile);
- exit(2);
- } /* if */
- /* find out how big the patterns are */
- if (fstat(fileno(PFile),&StatBuff) == -1) {
- fprintf(stderr,"bm: can't fstat %s\n",PatFile);
- exit(2);
- } /* if */
- if (isatty(fileno(PFile)))
- PatSize = PSIZEDEF;
- else PatSize = StatBuff.st_size;
- if (!PatSize) {
- fprintf(stderr,"bm: pattern file is empty\n");
- exit(2);
- } /* if */
- if (!(PatBuff = malloc(PatSize+1))) {
- fprintf(stderr,"bm: insufficient memory to store patterns\n");
- exit(2);
- } /* if */
- fread(PatBuff,1,PatSize,PFile); /* get the patterns */
- /* make sure the patterns are null-terminated. We can't have
- * nulls in the patterns */
- if (PatBuff[PatSize-1] == '\n')
- PatBuff[PatSize-1] = '\0';
- else
- PatBuff[PatSize] = '\0';
- fclose(PFile);
- return(MkDescVec(DescVec,PatBuff));
- } /* GetPatFile */
- SHAR_EOF
- if test -f 'Global.c'
- then
- echo shar: over-writing existing file "'Global.c'"
- fi
- cat << \SHAR_EOF > 'Global.c'
- /* global flags for bm */
- int cFlag=0, /* true if we want only a count of matching lines */
- eFlag=0, /* indicates that next argument is the pattern */
- fFlag=0, /* true if the patterns are to come from a file */
- lFlag=0, /* true if we want a list of files containing the pattern */
- nFlag=0, /* true if we want the character offset of the pattern */
- sFlag=0, /* true if we want silent mode */
- xFlag=0, /* true if we want only lines which match entirely */
- hFlag=0, /* true if we want no filenames in output */
-
- MatchCount=0; /* count of number of times a search string was found
- * in the text */
- char *FileName = 0;
- SHAR_EOF
- if test -f 'MakeDesc.c'
- then
- echo shar: over-writing existing file "'MakeDesc.c'"
- fi
- cat << \SHAR_EOF > 'MakeDesc.c'
- #include <stdio.h>
- #include "bm.h"
- #include "Extern.h"
- extern char * malloc();
- /* makes a pattern descriptor */
- struct PattDesc *MakeDesc(Pattern)
- char *Pattern;
- {
- struct PattDesc *Desc;
- Desc = (struct PattDesc *) malloc(sizeof(struct PattDesc));
- if (!(Desc->Skip1 = (unsigned short int *)
- malloc( sizeof(int) * (MAXCHAR + 1)))){
- fprintf(stderr,"bm: can't allocate space\n");
- exit(2);
- } /* if */
- if (!(Desc->Skip2 = (unsigned short int *)
- malloc(sizeof(int) * strlen(Pattern)))){
- fprintf(stderr,"bm: can't allocate space\n");
- exit(2);
- } /* if */
- Desc->Pattern=Pattern;
- Desc->PatLen = strlen(Desc->Pattern);
- MakeSkip(Desc->Pattern,Desc->Skip1,
- Desc->Skip2,Desc->PatLen);
- return(Desc);
- } /* main */
- SHAR_EOF
- if test -f 'MakeSkip.c'
- then
- echo shar: over-writing existing file "'MakeSkip.c'"
- fi
- cat << \SHAR_EOF > 'MakeSkip.c'
- #include <stdio.h>
- #include "bm.h"
- extern char *malloc();
-
- MakeSkip(Pattern,Skip1,Skip2,PatLen)
- char Pattern[];
- unsigned short int Skip1[], Skip2[];
- int PatLen;
- /* generate the skip tables for Boyer-Moore string search algorithm.
- * Skip1 is the skip depending on the character which failed to match
- * the pattern, and Skip2 is the skip depending on how far we got into
- * the pattern. Pattern is the search pattern and PatLen is strlen(Pattern) */
- {
- int *BackTrack; /* backtracking table for t when building skip2 */
- int c; /* general purpose constant */
- int j,k,t,tp; /* indices into Skip's and BackTrack */
-
- if (!(BackTrack = (int *) malloc(PatLen * (sizeof (int))))){
- fprintf(stderr,"bm: can't allocate space\n");
- exit(2);
- } /* if */
- for (c=0; c<=MAXCHAR; ++c)
- Skip1[c] = PatLen;
- for (k=0;k<PatLen;k++) {
- Skip1[Pattern[k]] = PatLen - k - 1;
- Skip2[k] = 2 * PatLen - k - 1;
- } /* for */
- for (j=PatLen - 1,t=PatLen;j >= 0; --j,--t) {
- BackTrack[j] = t;
- while (t<PatLen && Pattern[j] != Pattern[t]) {
- Skip2[t] = min(Skip2[t], PatLen - j - 1);
- t = BackTrack[t];
- } /* while */
- } /* for */
- for (k=0;k<=t;++k)
- Skip2[k] = min(Skip2[k],PatLen+t-k);
- tp=BackTrack[t];
- while(tp<PatLen) {
- while(t<PatLen) {
- Skip2[t] = min(Skip2[t],tp-t+PatLen);
- ++t;
- } /* while */
- tp = BackTrack[tp];
- } /* while */
- cfree(BackTrack);
- } /* MakeSkip */
- SHAR_EOF
- if test -f 'MatchFound.c'
- then
- echo shar: over-writing existing file "'MatchFound.c'"
- fi
- cat << \SHAR_EOF > 'MatchFound.c'
- #include <stdio.h>
- #include "bm.h"
- #include "Extern.h"
- MatchFound(Desc, BuffPos, Buffer, BuffEnd)
- struct PattDesc *Desc; /* state info about search for one string */
- int BuffPos; /* offset of first char of buffer into the file being searched */
- char *Buffer, /* pointer to the first character in the buffer */
- *BuffEnd; /* pointer to the last character in the buffer */
- {
- char *MLineBegin, *MLineEnd;
- Desc->Success = 0;
- /* Start points to first character after a successful match */
- MLineBegin = MLineEnd = Desc->Start - 1;
- while(MLineBegin >=Buffer && *MLineBegin != '\n') --MLineBegin;
- ++MLineBegin;
- while( MLineEnd <= BuffEnd && *MLineEnd != '\n') ++MLineEnd;
- if (MLineEnd > BuffEnd) --MLineEnd;
- /* fixed 25jun85 pdbain. suppress multiple matches of the same
- * pattern on one line */
- Desc->Start = MLineEnd + 1;
- /* check if exact match */
- if (xFlag && !( Desc->PatLen == (*MLineEnd != '\n' ? ((MLineEnd -
- MLineBegin) + 1) : (MLineEnd - MLineBegin))))
- return(0); /* failure */
- if (sFlag) return(1);
- if (cFlag) {
- ++MatchCount;
- return(1);
- } /* if */
- PrintLine(BuffPos+(Desc->Start-Buffer),MLineBegin,MLineEnd);
- return(1);
- } /* MatchFound */
- SHAR_EOF
- if test -f 'MkDescVec.c'
- then
- echo shar: over-writing existing file "'MkDescVec.c'"
- fi
- cat << \SHAR_EOF > 'MkDescVec.c'
- #include "bm.h"
- #include <string.h>
- /* scan a newline-separated string of patterns and set up the
- * vector of descriptors, one pattern descriptor per pattern.
- * Return the number of patterns */
- int
- MkDescVec(DescVec, Pats)
- struct PattDesc *DescVec[];
- char *Pats;
- {
- int NPats = 0;
- char *EndPat;
- extern struct PattDesc *MakeDesc();
- /* some systems use "strchr" instead of "index" */
- /* while (*Pats && (EndPat = index(Pats,'\n')) && NPats < MAXPATS) { */
- while (*Pats && (EndPat = strchr(Pats,'\n')) && NPats < MAXPATS) {
- *EndPat = '\0';
- DescVec[NPats] = MakeDesc(Pats);
- Pats = EndPat + 1;
- ++NPats;
- } /* while */
- if (*Pats && NPats < MAXPATS) {
- DescVec[NPats] = MakeDesc(Pats);
- ++NPats;
- } /* if */
- return(NPats);
- } /* MkDescVec */
- SHAR_EOF
- if test -f 'MoveResidue.c'
- then
- echo shar: over-writing existing file "'MoveResidue.c'"
- fi
- cat << \SHAR_EOF > 'MoveResidue.c'
- #include "bm.h"
- /* Moves the text which has not been completely searched at the end of
- * the buffer to the beginning of the buffer
- * and returns number of bytes moved */
- int MoveResidue(DescVec, NPats,Buffer, BuffEnd)
- struct PattDesc *DescVec[];
- int NPats;
- char *Buffer, *BuffEnd;
- {
- char *FirstStart, *Residue;
- /* use this declaration if you don't use "bcopy" */
- register char *f, *t;
- int ResSize, i;
- FirstStart = BuffEnd;
- /* find the earliest point which has not been
- * completely searched */
- for (i=0; i < NPats; i++) {
- FirstStart =
- min(FirstStart, DescVec[i]-> Start );
- } /* for */
- /* now scan to the beginning of the line containing the
- * unsearched text */
- for (Residue = FirstStart; *Residue != '\n' &&
- Residue >= Buffer; Residue--);
- if (Residue < Buffer)
- Residue = FirstStart;
- else ++Residue;
- ResSize = (BuffEnd - Residue + 1);
- /* now move the residue to the beginning of
- * the file */
- /* use this if you don't have "bcopy" */
- t = Buffer; f = Residue;
- for(i=ResSize;i;--i)
- *t++ = *f++;
- /* use this if you do have "bcopy"
- bcopy(Residue, Buffer, ResSize);
- */
- /* now fix the status vectors */
- for (i=0; i < NPats; i++) {
- DescVec[i]->Start -= (Residue - Buffer );
- } /* for */
- return(ResSize);
- } /* MoveResidue */
- SHAR_EOF
- if test -f 'PrintLine.c'
- then
- echo shar: over-writing existing file "'PrintLine.c'"
- fi
- cat << \SHAR_EOF > 'PrintLine.c'
- #include <stdio.h>
- #include <string.h>
- #include "Extern.h"
- PrintLine(OffSet,LineStart,LineEnd)
- int OffSet; /* offset of LineStart from beginning of file */
- char *LineStart,
- *LineEnd;
- {
- char OffStr[80];
- if (lFlag) {
- if (strlen(FileName) > 76) {
- fprintf(stderr,"bm: filename too long\n");
- exit(2);
- } /* if */
- if (strlen(FileName)) {
- sprintf(OffStr,"%s\n",FileName);
- write(1,OffStr,strlen(OffStr));
- } /* if */
- return;
- } /* if */
- if (FileName && !hFlag) {
- if (strlen(FileName) > 76) {
- fprintf(stderr,"bm: filename too long\n");
- exit(2);
- } /* if */
- sprintf(OffStr,"%s:",FileName);
- write(1,OffStr,strlen(OffStr));
- } /* if */
- if (nFlag) {
- sprintf(OffStr,"%d: ",OffSet);
- write(1,OffStr,strlen(OffStr));
- } /* if */
- write(1,LineStart,LineEnd-LineStart+1);
- if (*LineEnd != '\n') write (1,"\n",1);
- } /* PrintLine */
- SHAR_EOF
- if test -f 'PutUsage.c'
- then
- echo shar: over-writing existing file "'PutUsage.c'"
- fi
- cat << \SHAR_EOF > 'PutUsage.c'
- #include <stdio.h>
- PutUsage()
- {
- fprintf(stderr,
- "bm: search for a given string or strings in a file or files\n");
- fprintf(stderr,
- "synopsis: bm [option]* [string(s)] [file]*\n");
- fprintf(stderr,
- "options:\n");
- fprintf(stderr,
- "-c print only a count of matching lines \n");
- fprintf(stderr,
- "-e Take next argument as the pattern\n");
- fprintf(stderr,
- "-f PFile read patterns from a file PFile\n");
- fprintf(stderr,
- "-h Do not print file names\n");
- fprintf(stderr,
- "-l print a list of files containing the pattern(s) \n");
- fprintf(stderr,
- "-n print the character offset of the pattern(s) \n");
- fprintf(stderr,
- "-s silent mode. Return only success (0) or failure (1)\n");
- fprintf(stderr,
- "-x print only lines which match entirely \n");
- } /*PutUsage */
- SHAR_EOF
- if test -f 'Search.c'
- then
- echo shar: over-writing existing file "'Search.c'"
- fi
- cat << \SHAR_EOF > 'Search.c'
- #include "bm.h"
- #include "Extern.h"
- int Search(Pattern,PatLen,EndBuff, Skip1, Skip2, Desc)
- register char Pattern[];
- int PatLen;
- char *EndBuff;
- unsigned short int Skip1[], Skip2[];
- struct PattDesc *Desc;
- {
- register char *k, /* indexes text */
- *j; /* indexes Pattern */
- register int Skip; /* skip distance */
- char *PatEnd,
- *BuffEnd; /* pointers to last char in Pattern and Buffer */
- BuffEnd = EndBuff;
- PatEnd = Pattern + PatLen - 1;
-
- k = Desc->Start;
- Skip = PatLen-1;
- while ( Skip <= (BuffEnd - k) ) {
- j = PatEnd;
- k = k + Skip;
- while (*j == *k) {
- if (j == Pattern) {
- /* found it. Start next search
- * just after the pattern */
- Desc -> Start = k + Desc->PatLen;
- return(1);
- } /* if */
- --j; --k;
- } /* while */
- Skip = max(Skip1[*(unsigned char *)k],Skip2[j-Pattern]);
- } /* while */
- Desc->Start = k+Skip-(PatLen-1);
- return(0);
- } /* Search */
- SHAR_EOF
- if test -f 'Makefile'
- then
- echo shar: over-writing existing file "'Makefile'"
- fi
- cat << \SHAR_EOF > 'Makefile'
- CCFLAGS = -O
- SOURCES = bm.h bm.c Execute.c Extern.h\
- GetPatFile.c Global.c MakeDesc.c MakeSkip.c \
- MatchFound.c \
- MkDescVec.c MoveResidue.c PrintLine.c PutUsage.c Search.c
- OBJECTS = bm.o Execute.o \
- GetPatFile.o Global.o MakeDesc.o MakeSkip.o \
- MatchFound.o \
- MkDescVec.o MoveResidue.o Search.o PrintLine.o PutUsage.o
- BASEFILES = $(SOURCES) Makefile README bm.1
- bm: $(OBJECTS)
- cc -s -o bm $(CCFLAGS) $(OBJECTS)
- install: bm
- rm -f /usr/bin/bm
- cp bm /usr/bin/bm
- chmod ugo-w /usr/bin/bm
- # rm /usr/src/public/bm/*
- # cp $(BASEFILES) /usr/src/public/bm
- shar:
- /usr/local/bin/shar $(BASEFILES) >bm.shar
- man: /usr/man/man1/bm.1
- /usr/man/man1/bm.1: bm.1
- rm -f /usr/man/man1/bm.1
- cp bm.1 /usr/man/man1/bm.1
- man bm > /dev/null
- bm.o: bm.c bm.h Extern.h
- cc -c $(CCFLAGS) bm.c
- PutUsage.o: PutUsage.c bm.h
- cc -c $(CCFLAGS) PutUsage.c
- MakeSkip.o: MakeSkip.c bm.h
- cc -c $(CCFLAGS) MakeSkip.c
- Search.o: Search.c bm.h Extern.h
- cc -c $(CCFLAGS) Search.c
- Execute.o: Execute.c bm.h
- cc -c $(CCFLAGS) Execute.c
- MoveResidue.o: MoveResidue.c bm.h Extern.h
- cc -c $(CCFLAGS) MoveResidue.c
- MatchFound.o: MatchFound.c bm.h Extern.h
- cc -c $(CCFLAGS) MatchFound.c
- PrintLine.o: PrintLine.c Extern.h
- cc -c $(CCFLAGS) PrintLine.c
- MkDescVec.o: MkDescVec.c bm.h
- cc -c $(CCFLAGS) MkDescVec.c
- GetPatFile.o: GetPatFile.c bm.h
- cc -c $(CCFLAGS) GetPatFile.c
- MakeDesc.o: MakeDesc.c bm.h
- cc -c $(CCFLAGS) MakeDesc.c
- Global.o: Global.c
- cc -c $(CCFLAGS) Global.c
- listing:
- # use -o for Sys V, -i for 4.2BSD
- # print -i3 $(BASEFILES)
- print -o3 $(BASEFILES)
- clean:
- rm -f *.o a.out foo bar blat junk core
- SHAR_EOF
- if test -f 'README'
- then
- echo shar: over-writing existing file "'README'"
- fi
- cat << \SHAR_EOF > 'README'
- Bm is a fast pattern matching utility, intended to be almost
- identical in functionality to fgrep (ugh!) but much faster. It uses
- the Boyer-Moore algorithm, as described in the papers listed below:
-
- D.E. Knuth, J.H. Morris, V.R. Pratt,"Fast Pattern Matching in Strings",
- SIAM J. Comput., 6(2), June 1977, 323-350,
-
- Z. Galil,
- "On Improving the Worst Case Running Time of the Boyer-Moore String
- Matching Algorithm",
- CACM, 22(9), Sept. 1979, ACM,
-
- R.S. Boyer, J.S. Moore,"A Fast String Searching Algorithm", CACM, 20(10),
- Oct. 1977, 762-772,
-
- G. de V. Smit,"A Comparison of Three String Matching Algorithms",
- Software - Practice and Experience, vol. 12, 1982, 57-66,
-
- *** NOTE *** There are certain system dependencies in the code.
- Please check whether your system uses "index" or "strchr" to
- find a character in a string: this affects MkDescVec.c.
- Also check whether your system uses <strings.h> or <string.h>.
- This affects match.c/bm.c, MkDescVec.c, and PrintLine.c.
- Also check whether your system has "bcopy". If so, see MoveResidue.c
-
- The files are MkDescVec.c, PrintLine.c, bm.c, and
- Execute.c: search a file for the patterns
- Extern.h: declarations of externs
- GetPatFile.c: read in patterns from a file and set up a vector of
- pattern descriptors
- Global.c: global variables (complement to Extern.h)
- MakeDesc.c: create a pattern descriptor for one pattern, including
- skip tables, etc.
- MakeSkip.c: make the skip tables for one pattern
- Makefile: you can figure this one out for yourself
- MatchFound.c: what to do when you actually FIND a pattern - print it,
- update flags, etc.
- MkDescVec.c: make a vector of pattern descriptors, given a string
- of newline-separated patterns
- MoveResidue.c: when you come to the end of the buffer, move the
- unsearched "residue" to the beginning and start again
- PrintLine.c: print the appropriate stuff after finding a match
- PutUsage.c: mini-man page.
- README: this file
- Search.c: the guts. Implements B-M algorithm given a pattern, skip
- tables for the pattern, and a buffer to search
- bm.c: mainline. mostly interpreting the command line and tidying
- up at the end. Calls Execute for each file.
- bm.h: constants
- bm.p: man page
- SHAR_EOF
- if test -f 'bm.1'
- then
- echo shar: over-writing existing file "'bm.1'"
- fi
- cat << \SHAR_EOF > 'bm.1'
- .TH BM (1) "8 July 1985"
- .UC 4
- .SH NAME
- bm \- search a file for a string
- .SH SYNOPSIS
- .B /usr/bin/bm
- [ option ] ...
- [ strings ]
- [ file ]
- .SH DESCRIPTION
- .I Bm
- searches the input
- .I files
- (standard input default) for lines matching a string.
- Normally, each line found is copied to the standard output.
- It is blindingly fast.
- .I Bm
- strings are fixed sequences of characters:
- there are no wildcards, repetitions, or other features
- of regular expressions.
- Bm is also case sensitive.
- The following options are recognized.
- .TP
- .B \-x
- (Exact) only lines matched in their entirety are printed
- .TP
- .B \-l
- The names of files with matching lines are listed (once) separated by newlines.
- .TP
- .B \-c
- Only a count of the number of matches
- is printed
- .TP
- .B \-e "string"
- The string is the next argument after the
- .B \-e
- flag. This allows strings beginning with '-'.
- .TP
- .B \-h
- No filenames are printed, even if multiple files are searched.
- .TP
- .B \-n
- Each line is preceded by the number
- of characters from the beginning of the file
- to the match.
- .TP
- .B \-s
- Silent mode. Nothing is printed (except error messages).
- This is useful for checking the error status.
- .TP
- .BI \-f " path"
- The string list
- is taken from the
- .I path.
- This may be either a file or a tty.
- .LP
- Unless the
- .B \-h
- option is specified
- the file name is shown if there is more than one input file.
- Care should be taken when using the characters $ * [ ^ | ( ) and \\ in the
- .I strings
- (listed on the command line)
- as they are also meaningful to the Shell. It is safest to enclose the entire
- .I expression
- argument in single quotes \' \'.
- .LP
- .I Bm
- searches for lines that contain one of the (newline-separated)
- .I strings,
- using
- the Boyer-Moore algorithm.
- It is far superior in terms of speed to the grep (egrep, fgrep)
- family of pattern matchers for fixed-pattern searching,
- and its speed increases with pattern length.
- .SH "SEE ALSO"
- grep(1)
- .SH DIAGNOSTICS
- Exit status is 0 if any matches are found,
- 1 if none, 2 for syntax errors or inaccessible files.
- .SH AUTHOR
- Peter Bain (pdbain@bnr-vpa), with modifications suggested by John Gilmore
- and Amir Plivatsky
- .SH BUGS
- Only 100 patterns are allowed.
- .LP
- Patterns may not contain newlines.
- .LP
- If a line (delimited by newlines, and the beginning and end of the file)
- is longer than 8000 charcters (e.g. in a core dump),
- it will not be completely printed.
- .LP
- If multiple patterns are specified, the order of the ouput lines is not
- necessarily the same as the order of the input lines.
- .LP
- A line will be printed once for each different string on that line.
- .LP
- The algorithm cannot count lines.
- .LP
- The
- .B -n
- and
- .B -c
- work differently from fgrep.
- .LP
- The
- .B -v,
- .B -i,
- and
- .B -b
- are not available.
- SHAR_EOF
- # End of shell archive
- exit 0
-
-
- % t argument after the
- .B \-e
- flag. This allows strings beginning with '-'.
- .TP
- .B \-h
- No filenames are printed,